import numpy as np
from tqdm import tqdm
import os
import shutil
from shutil import copyfile
import scipy
from scipy import misc
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
import csv
import random
import glob
import cv2
import keras
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from keras.preprocessing import image
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, GlobalAveragePooling2D, Flatten, BatchNormalization
We use GeForce GTX 1060 GPU for execution of the code. The following code checks availability of available devices.
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
import tensorflow as tf
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
config = {
'data_directory' : './stl10',
'augmented_train_data' : './augmented_train_data',
'ResNet50' : {
'name' : 'ResNet50',
'input_shape' : (197, 197, 3),
'dataset' : 'STL-10',
'features_file_train' : './bottleneck_features/ResNet50_STL-10_features_train.npz',
'features_file_test' : './bottleneck_features/ResNet50_STL-10_features_test.npz',
'model_file_saved' : './best_model_saved/ResNet50_STL-10_model_best.hdf5'
}
}
# load the data
class load_data:
def read_labels(path_to_labels):
with open(path_to_labels, 'rb') as f:
labels = np.fromfile(f, dtype=np.uint8) - 1
return labels
def read_all_images(path_to_data):
with open(path_to_data, 'rb') as f:
# read whole file in uint8 chunks
everything = np.fromfile(f, dtype=np.uint8)
images = np.reshape(everything, (-1, 3, 96, 96))
images = np.transpose(images, (0, 3, 2, 1))
return images
x_train = load_data.read_all_images(config['data_directory'] + '/train_X.bin')
y_train = load_data.read_labels(config['data_directory'] + '/train_y.bin')
x_test = load_data.read_all_images(config['data_directory'] + '/test_X.bin')
y_test = load_data.read_labels(config['data_directory'] + '/test_y.bin')
y_train = np.squeeze(y_train)
print('Data loaded')
print("Number of training examples =", x_train.shape[0])
print("Number of testing examples =", x_test.shape[0])
print("Image data shape =", x_train.shape[1:])
print("Number of classes =", len(set(y_train)))
class Summary:
def __init__(self, X_train, y_train, n_classes):
self.X_train = X_train
self.y_train = y_train
self.n_classes = n_classes
self.labels_text = ['Airplane', 'Bird', 'Car', 'Cat', 'Deer', 'Dog', 'Horse', 'Monkey', 'Ship', 'Truck']
def labels_frequency(self):
# histogram of label frequency
hist, bins = np.histogram(self.y_train, bins=self.n_classes)
width = 0.75 * (bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2
plt.bar(center, hist, align='center', width=width)
plt.show()
def random_data(self, n=10):
images = []
labels = []
for i in range(n):
index = random.randint(0, len(self.X_train))
image = self.X_train[index]
images.append(image)
labels.append(self.y_train[index])
return images, labels
def images_from_a_folder(self, files):
images = []
for i, img in enumerate(glob.glob(files)):
image = cv2.imread(img)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
images.append(image)
return images
def display_data(self, images, labels, n_rows, n_cols):
# show image of 'count' random data points
plt.figure(figsize=(30, 15), dpi=128)
fig, axs = plt.subplots(n_rows, n_cols, figsize=(30,15))
fig.subplots_adjust(hspace = .20, wspace=.15)
axs = axs.ravel()
for i in range(n_cols*n_rows):
axs[i].axis('off')
axs[i].imshow(images[i])
axs[i].set_title(self.labels_text[labels[i]], fontsize=20)
def image_histograms(self, images, labels, n_rows, n_cols):
plt.figure(figsize=(30, 15), dpi=128)
fig, axs = plt.subplots(n_rows, n_cols, figsize=(30,15))
fig.subplots_adjust(hspace = .20, wspace=.15)
axs = axs.ravel()
for i in range(n_cols*n_rows):
axs[i].axis('off')
gray_img = cv2.cvtColor(images[i], cv2.COLOR_BGR2GRAY)
hist = cv2.calcHist([images[i]],[0],None,[96],[0,96])
axs[i].hist(gray_img.ravel(),96,[0,96])
axs[i].set_title(self.labels_text[labels[i]], fontsize=20)
# Training data visualization
#########################################################################
s = Summary(x_train, y_train, 10)
s.labels_frequency()
# Display random images with correct labels
images, labels = s.random_data(50)
s.display_data(images, labels, 5, 10)
s.image_histograms(images, labels, 5, 10)
# one-hot encode the labels
y_train_categorical = np_utils.to_categorical(y_train, 10)
y_test_categorical = np_utils.to_categorical(y_test, 10)
print('Labels processed')
print("Training data mean before normalizing: ", np.mean(x_train))
x_train_normalized = (x_train)/255.0
print("Training data mean after normalizing: ", np.mean(x_train_normalized))
print("Test data mean before normalizing: ", np.mean(x_test))
x_test_normalized = (x_test)/255.0
print("Test data mean after normalizing: ", np.mean(x_test_normalized))
We intend to design a simple architecture of a Convolutional Neural Network (CNN) from the scratch that preserves symmetry with the following properties:
class ModelFromScratch():
def __init__(self):
self.model = None
def build(self, input_shape, conv_depths, c_size, fc_heights, act, n_classes):
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers import BatchNormalization, Flatten
from keras.models import Input, Model, Sequential
n_conv = len(conv_depths)
inp = Input(input_shape)
x = inp
for i in range(n_conv):
x = Conv2D(conv_depths[i], (c_size,c_size), activation=act)(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2,2), strides=(2, 2))(x)
x = Flatten()(x)
x = Dropout(0.5)(x)
n_fc = len(fc_heights)
for i in range(n_fc):
x = Dense(fc_heights[i], activation=act)(x)
x = Dropout(0.5)(x)
x = Dense(n_classes, activation='softmax')(x)
self.model = Model(inputs=inp, outputs=x)
def compile(self, rate, beta_1, beta_2):
opt = keras.optimizers.Adam(lr=rate, beta_1=beta_1, beta_2=beta_2)
self.model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
def fit(self, file, epochs):
checkpointer = ModelCheckpoint(filepath=file, monitor='val_acc', verbose=1, save_best_only=True)
import os
if os.path.isfile(file):
self.model.load_weights(file)
self.model.fit(x_train_normalized, y_train_categorical, batch_size=128, epochs=epochs, validation_split=0.2,
callbacks=[checkpointer], verbose=2, shuffle=True)
def fit_augmented(self, file, epochs):
checkpointer = ModelCheckpoint(filepath=file, monitor='val_acc', verbose=1, save_best_only=True)
import os
if os.path.isfile(file):
self.model.load_weights(file)
self.model.fit(x_train_augmented_normalized, y_train_augmented_categorical,
batch_size=128, epochs=epochs, validation_split=0.2,
callbacks=[checkpointer], verbose=2, shuffle=True)
def test(self, file):
self.model.load_weights(file)
# evaluate test accuracy
score = self.model.evaluate(x_test_normalized, y_test_categorical, verbose=2)
accuracy = 100*score[1]
# print test accuracy
print('Test accuracy: %.4f%%' % accuracy)
def predict(self, img):
pred = self.model.predict(img)
return pred
def summary(self):
self.model.summary()
Define the following variables:
Upon tuning over the following values
we have found consistantly above 60% test accuracy with $N=3$, $m=80$, and $t=4$.
model = ModelFromScratch()
model.build(x_train_normalized.shape[1:], [80, 80, 80], 4, [1280], 'relu', 10)
model.summary()
model.compile(rate=0.00009, beta_1=0.99, beta_2=0.999)
if os.path.isfile('model_80x3_4_1280_relu_original_5000.hdf5'):
pass
else:
model.fit('model_80x3_4_1280_relu_original_5000.hdf5', epochs=50)
if os.path.isfile('model_80x3_4_1280_relu_original_5000.hdf5'):
model.test('model_80x3_4_1280_relu_original_5000.hdf5')
else:
pass
Comment
We have been able to obtain a test accuracy more than 60% on the test subset with the weights trained on the original training dataset which was part of the goal of the project.
We use the ResNet50 pre-trained Imagenet model to extract features from STL-10 training dataset and then finetune it to use it as a classifier of the STL-10 dataset. We use the classifier (with at least 90% accuracy on the STL-10 test subset) to label the STL-10 unlabeled dataset.
class FeaturesExtractor:
def __init__(self,
model_config):
self.model_config = model_config
self.name = model_config['name']
self.input_shape = model_config['input_shape']
self.features_file_train = model_config['features_file_train']
self.features_file_test = model_config['features_file_test']
self.model_file_saved = model_config['model_file_saved']
self.model = ResNet50(weights='imagenet', include_top=False, input_shape=self.input_shape)
print('Output shape:', self.model.output_shape)
print('model loaded')
def extract_imageset_features(self, images, file=None):
print('Reshaping images')
resized_images = np.array([scipy.misc.imresize(images[i], self.input_shape)
for i in tqdm(range(0, len(images)))]).astype('float32')
print('Images resized')
print('Preprocessing images')
preprocessed_input = preprocess_input(resized_images)
print('Images preprocessed')
print('Features extraction - starts')
features = []
for i in tqdm(range(0, len(images))):
img_features = self.model.predict(np.expand_dims(resized_images[i], axis=0))
features.append(img_features)
features = np.squeeze(features)
print('Features extraction - completed')
if file != None:
print('Saving features...')
np.savez(file, features=features)
print('Features saved')
return features
def extract_image_features(self, image, file=None):
resized_image = scipy.misc.imresize(image, self.input_shape).astype('float32')
preprocessed_input = preprocess_input(resized_image)
features = self.model.predict(np.expand_dims(resized_image, axis=0))
features = np.squeeze(features)
return features
def extract_train_features(self, X):
if os.path.exists(self.features_file_train):
print('Bottleneck features detected (train)')
features = np.load(self.features_file_train)['features']
print('Bottleneck features (train) loaded')
else:
features = self.extract_imageset_features(X, file=self.features_file_train)
return features
def extract_test_features(self, X):
if os.path.exists(self.features_file_test):
print('Bottleneck features detected (test)')
features = np.load(self.features_file_test)['features']
print('Bottleneck features (test) loaded')
else:
features = self.extract_imageset_features(X, file=self.features_file_test)
return features
def extract_unlabeled_imageset_features(self, images):
resized_images = np.array([scipy.misc.imresize(images[i], self.input_shape)
for i in range(0, len(images))]).astype('float32')
preprocessed_input = preprocess_input(resized_images)
print('Features extraction - starts')
features = []
for i in tqdm(range(0, len(images))):
img_features = self.model.predict(np.expand_dims(resized_images[i], axis=0))
features.append(img_features)
features = np.squeeze(features)
print('Features extraction - completed')
return features
def extract_unlabeled_features(self, X, i):
file = self.model_config["features_file_unlabeled_"+i]
if os.path.exists(file):
print('Bottleneck features detected (unlabeled)')
features = np.load(file)['features']
print('Bottleneck features (unlabeled) loaded')
else:
features = self.extract_imageset_features(X, file=file)
return features
# ResNet50 has output shape = 1x1x2048
x = FeaturesExtractor(config['ResNet50'])
train_features = x.extract_train_features(x_train)
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
%matplotlib inline
# reshape bottleneck features + reduce dimensionality with t-SNE
if os.path.exists('tsne_features.npz'):
print('tsne features detected (test)')
tsne_features = np.load('tsne_features.npz')['tsne_features']
else:
print('tsne features not detected (test)')
print('calculating now ...')
# fit_trainform: transform data into lower dimension
tsne_features = TSNE(learning_rate=100).fit_transform(train_features)
np.savez('tsne_features', tsne_features=tsne_features)
print('tsne features obtained')
# plot the features
plt.figure(figsize=(12,12))
plt.scatter(tsne_features[:,0], tsne_features[:,1], c=plt.cm.jet(y_train/10), s=10, edgecolors='none')
plt.show()
Analysis:
The bottleneck features of ResNet50 provides encouraging result as shown in the plot above where points representing same-class objects are in the neighborhood on the 2D plot. Thus training a classifier on the bottleneck features would provide better validation and test performance.
def Model(input_shape):
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=input_shape))
model.add(BatchNormalization(axis=-1))
model.add(Dropout(0.5))
# 6 gives 93.850% test acc
for i in range(6):
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization(axis=-1))
#model.add(Dropout(0.3))
model.add(Dense(10, activation='softmax'))
return model
modelResNet50 = Model(train_features.shape[1:])
modelResNet50.summary()
modelResNet50.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
checkpointer = ModelCheckpoint(filepath=config['ResNet50']['model_file_saved'], monitor='val_acc',
verbose=1, save_best_only=True)
if os.path.isfile(config['ResNet50']['model_file_saved']):
modelResNet50.load_weights(config['ResNet50']['model_file_saved'])
modelResNet50.fit(train_features, y_train_categorical, batch_size=30, epochs=100, validation_split=0.2,
callbacks=[checkpointer], verbose=2, shuffle=True)
test_features = x.extract_test_features(x_test)
# load the weights that yielded the best validation accuracy
# model.load_weights(config['ResNet50']['model_file_saved'])
modelResNet50.load_weights(config['ResNet50']['model_file_saved'])
# evaluate test accuracy
score = modelResNet50.evaluate(test_features, y_test_categorical, verbose=2)
accuracy = 100*score[1]
# print test accuracy
print('Test accuracy: %.4f%%' % accuracy)
x_unlabeled = load_data.read_all_images(config['data_directory'] + '/unlabeled_X.bin')
print('Unlabeled data loaded', x_unlabeled.shape)
class SummaryUnlabeled:
def __init__(self):
self.labels_text = ['Airplane', 'Bird', 'Car', 'Cat', 'Deer', 'Dog', 'Horse', 'Monkey', 'Ship', 'Truck']
def random_data(self, n=10):
images = []
for i in range(n):
index = random.randint(0, len(x_unlabeled))
image = x_unlabeled[index]
images.append(image)
return images
def get_labels(self, images):
labels = []
values = []
for i in range(len(images)):
pic_features = x.extract_image_features(images[i])
preds = modelResNet50.predict(np.expand_dims(pic_features,axis=0))
topclass_idx = np.argmax(preds)
topclass_prob = np.max(preds)
labels.append(topclass_idx)
values.append(topclass_prob)
return labels, values
def get_indices(self, threshold, how_many):
indices = []
count = 0
for i in range(0,len(x_unlabeled)):
pic_features = x.extract_image_features(x_unlabeled[i])
preds = modelResNet50.predict(np.expand_dims(pic_features,axis=0))
topclass_idx = np.argmax(preds)
topclass_prob = np.max(preds)
if topclass_prob >= threshold:
indices.append(i)
count = count + 1
if count==how_many:
return indices
return indices
def display_data(self, images, labels, values, n_rows, n_cols):
# show image of 'count' random data points
plt.figure(figsize=(30, 30), dpi=80)
fig, axs = plt.subplots(n_rows, n_cols, figsize=(30, 30))
fig.subplots_adjust(hspace = .25, wspace=.25)
axs = axs.ravel()
for i in range(n_cols*n_rows):
axs[i].axis('off')
axs[i].imshow(images[i])
str1 = self.labels_text[labels[i]] + ' (' + str(np.around(values[i], decimals=3))+')'
axs[i].set_title(str1)
def display_data_more(self, images, labels, values, figh, figw, n_rows, n_cols):
# show image of 'count' random data points
plt.figure(figsize=(figw, figh), dpi=128)
fig, axs = plt.subplots(n_rows, n_cols, figsize=(figw, figh))
fig.subplots_adjust(hspace = .20, wspace=.15)
axs = axs.ravel()
for i in range(n_cols*n_rows):
axs[i].axis('off')
axs[i].imshow(images[i])
str1 = self.labels_text[labels[i]] + ' (' + str(np.around(values[i], decimals=3))+')'
axs[i].set_title(str1, fontsize=20)
s = SummaryUnlabeled()
images = s.random_data(200)
labels, values = s.get_labels(images)
s.display_data_more(images, labels, values, 60, 30, 20, 10)
Analysis:
Given the unlabeled dataset contains objects beyond the 10 object classes available in the labeled STL-10 dataset, it is natural that there are serveral approximations done by the ResNet50-based classifier (which is also only 93% correct against the test subset of STL-10 dataset). Now, the important question is how close is the approximation with respect to the features of the corresponding STL-10 label when the softmax probability for the labeling is close to 1.0, say more than 0.90. We list a few cases (since the images are random, the cases below may not exactly correspond to different runs of the above images):
It appears that the softmax probability is higher of features are more and more similar to the corresponding STL-10 class.
indices = s.get_indices(threshold=0.999, how_many=1000)
images = []
for i in range(len(indices)):
images.append(x_unlabeled[indices[i]])
labels, values = s.get_labels(images)
s.display_data_more(images, labels, values, 60, 30, 20, 10)
Analysis
Since the image distribution is similar but more diverse in the unlabeled STL-10 dataset, we have a number of wrong identifications with very high probability (0.999 or more). Let us look at a few such cases:
Even though in the test subset, there is no Cheetah, Leopard, Raccoon, Lioness, Cougar, adding these images as cats in the training dataset may enhance and diversify the features of cats and improve performance on the test subset. A similar assumption may be applicable for the classes: dog, horse, monkey, deer, truck, etc.
Since we have a large unlabeled image dataset, we restrict our augmentation of the training dataset on adding images selected from that unlabeled dataset based on the following assumptions:
class AugmentTraining:
def __init__(self, threshold, how_many):
self.threshold = threshold
self.how_many = how_many
def get_labeled_images(self):
print("Collecting images")
s = SummaryUnlabeled()
indices = s.get_indices(self.threshold, self.how_many)
images = []
for i in range(len(indices)):
images.append(x_unlabeled[indices[i]])
labels, values = s.get_labels(images)
print("Images collected")
return images, labels
def get_preprocessed_augmented_training_data(self):
print("Augmenting training data")
x_train_augmented = x_train.tolist()
y_train_augmented = y_train.tolist()
for i in range(len(images)):
x_train_augmented.append(images[i])
y_train_augmented.append(labels[i])
x_train_augmented = np.array(x_train_augmented)
y_train_augmented = np.array(y_train_augmented)
print("Augmentation completed")
print("Preprocessing augmented data")
x_train_augmented_normalized = (x_train_augmented)/255.0
y_train_augmented_categorical = np_utils.to_categorical(y_train_augmented, 10)
print("Preprocessing completed")
return x_train_augmented_normalized, y_train_augmented_categorical
def labels_frequency(y):
# histogram of label frequency
hist, bins = np.histogram(y, bins=10)
width = 0.75 * (bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2
plt.bar(center, hist, align='center', width=width)
plt.show()
if os.path.isfile(config['augmented_train_data'] + '/train_9000.npz'):
x_train_augmented_normalized = np.load(config['augmented_train_data'] + '/train_9000.npz')['x']
y_train_augmented_categorical = np.load(config['augmented_train_data'] + '/train_9000.npz')['y']
print('Loaded: ', x_train_augmented_normalized.shape, ' ', y_train_augmented_categorical.shape)
else:
aug = AugmentTraining(0.999, 4000)
images, labels = aug.get_labeled_images()
x_train_augmented_normalized, y_train_augmented_categorical = aug.get_preprocessed_augmented_training_data()
images = None
lables = None
print('Computed', x_train_augmented_normalized.shape, ' ', y_train_augmented_categorical.shape)
np.savez_compressed(config['augmented_train_data'] + "/train_9000.npz",
x=x_train_augmented_normalized,
y=y_train_augmented_categorical)
labels_frequency(np.argmax(y_train_augmented_categorical, axis=1))
if os.path.isfile('model_80x3_4_1280_relu_augmented_9000.hdf5'):
pass
else:
model = ModelFromScratch()
model.build(x_train_normalized.shape[1:], [80, 80, 80], 4, [1280], 'relu', 10)
model.compile(rate=0.00009, beta_1=0.99, beta_2=0.999)
model.fit_augmented('model_80x3_4_1280_relu_augmented_9000.hdf5', 50)
if os.path.isfile('model_80x3_4_1280_relu_augmented_9000.hdf5'):
model.test('model_80x3_4_1280_relu_augmented_9000.hdf5')
else:
pass
if os.path.isfile(config['augmented_train_data'] + '/train_13000.npz'):
x_train_augmented_normalized = np.load(config['augmented_train_data'] + '/train_13000.npz')['x']
y_train_augmented_categorical = np.load(config['augmented_train_data'] + '/train_13000.npz')['y']
print('Loaded: ', x_train_augmented_normalized.shape, ' ', y_train_augmented_categorical.shape)
else:
aug = AugmentTraining(0.999, 8000)
images, labels = aug.get_labeled_images()
x_train_augmented_normalized, y_train_augmented_categorical = aug.get_preprocessed_augmented_training_data()
images = None
lables = None
print('Computed', x_train_augmented_normalized.shape, ' ', y_train_augmented_categorical.shape)
np.savez_compressed(config['augmented_train_data'] + "/train_13000.npz",
x=x_train_augmented_normalized,
y=y_train_augmented_categorical)
labels_frequency(np.argmax(y_train_augmented_categorical, axis=1))
if os.path.isfile('model_80x3_4_1280_relu_augmented_13000.hdf5'):
pass
else:
model = ModelFromScratch()
model.build(x_train_normalized.shape[1:], [80, 80, 80], 4, [1280], 'relu', 10)
model.compile(rate=0.00009, beta_1=0.99, beta_2=0.999)
model.fit_augmented('model_80x3_4_1280_relu_augmented_13000.hdf5', 50)
if os.path.isfile('model_80x3_4_1280_relu_augmented_13000.hdf5'):
model.test('model_80x3_4_1280_relu_augmented_13000.hdf5')
else:
pass
if os.path.isfile(config['augmented_train_data'] + '/train_17000.npz'):
x_train_augmented_normalized = np.load(config['augmented_train_data'] + '/train_17000.npz')['x']
y_train_augmented_categorical = np.load(config['augmented_train_data'] + '/train_17000.npz')['y']
print('Loaded: ', x_train_augmented_normalized.shape, ' ', y_train_augmented_categorical.shape)
else:
aug = AugmentTraining(0.999, 12000)
images, labels = aug.get_labeled_images()
x_train_augmented_normalized, y_train_augmented_categorical = aug.get_preprocessed_augmented_training_data()
images = None
lables = None
print('Computed', x_train_augmented_normalized.shape, ' ', y_train_augmented_categorical.shape)
np.savez_compressed(config['augmented_train_data'] + "/train_17000.npz",
x=x_train_augmented_normalized,
y=y_train_augmented_categorical)
labels_frequency(np.argmax(y_train_augmented_categorical, axis=1))
if os.path.isfile('model_80x3_4_1280_relu_augmented_17000.hdf5'):
pass
else:
model = ModelFromScratch()
model.build(x_train_normalized.shape[1:], [80, 80, 80], 4, [1280], 'relu', 10)
model.compile(rate=0.00009, beta_1=0.99, beta_2=0.999)
model.fit_augmented('model_80x3_4_1280_relu_augmented_17000.hdf5', 50)
if os.path.isfile('model_80x3_4_1280_relu_augmented_17000.hdf5'):
model.test('model_80x3_4_1280_relu_augmented_17000.hdf5')
else:
pass
if os.path.isfile(config['augmented_train_data'] + '/train_21000.npz'):
x_train_augmented_normalized = np.load(config['augmented_train_data'] + '/train_21000.npz')['x']
y_train_augmented_categorical = np.load(config['augmented_train_data'] + '/train_21000.npz')['y']
print('Loaded: ', x_train_augmented_normalized.shape, ' ', y_train_augmented_categorical.shape)
else:
aug = AugmentTraining(0.999, 16000)
images, labels = aug.get_labeled_images()
x_train_augmented_normalized, y_train_augmented_categorical = aug.get_preprocessed_augmented_training_data()
images = None
lables = None
print('Computed', x_train_augmented_normalized.shape, ' ', y_train_augmented_categorical.shape)
np.savez_compressed(config['augmented_train_data'] + "/train_21000.npz",
x=x_train_augmented_normalized,
y=y_train_augmented_categorical)
labels_frequency(np.argmax(y_train_augmented_categorical, axis=1))
if os.path.isfile('model_80x3_4_1280_relu_augmented_21000.hdf5'):
pass
else:
model = ModelFromScratch()
model.build(x_train_normalized.shape[1:], [80, 80, 80], 4, [1280], 'relu', 10)
model.compile(rate=0.00009, beta_1=0.99, beta_2=0.999)
model.fit_augmented('model_80x3_4_1280_relu_augmented_21000.hdf5', 50)
if os.path.isfile('model_80x3_4_1280_relu_augmented_21000.hdf5'):
model.test('model_80x3_4_1280_relu_augmented_21000.hdf5')
else:
pass
if os.path.isfile(config['augmented_train_data'] + '/train_25000.npz'):
x_train_augmented_normalized = np.load(config['augmented_train_data'] + '/train_25000.npz')['x']
y_train_augmented_categorical = np.load(config['augmented_train_data'] + '/train_25000.npz')['y']
print('Loaded: ', x_train_augmented_normalized.shape, ' ', y_train_augmented_categorical.shape)
else:
aug = AugmentTraining(0.999, 20000)
images, labels = aug.get_labeled_images()
x_train_augmented_normalized, y_train_augmented_categorical = aug.get_preprocessed_augmented_training_data()
images = None
lables = None
print('Computed', x_train_augmented_normalized.shape, ' ', y_train_augmented_categorical.shape)
np.savez_compressed(config['augmented_train_data'] + "/train_25000.npz",
x=x_train_augmented_normalized,
y=y_train_augmented_categorical)
labels_frequency(np.argmax(y_train_augmented_categorical, axis=1))
if os.path.isfile('model_80x3_4_1280_relu_augmented_25000.hdf5'):
pass
else:
model = ModelFromScratch()
model.build(x_train_normalized.shape[1:], [80, 80, 80], 4, [1280], 'relu', 10)
model.compile(rate=0.00009, beta_1=0.99, beta_2=0.999)
model.fit_augmented('model_80x3_4_1280_relu_augmented_25000.hdf5', 50)
if os.path.isfile('model_80x3_4_1280_relu_augmented_25000.hdf5'):
model.test('model_80x3_4_1280_relu_augmented_25000.hdf5')
else:
pass
| Training set-size | Epochs | Training loss | Training accuracy | Validation loss | Validation accuracy | Test accuracy |
|---|---|---|---|---|---|---|
| 5000 (original) | 50 | 0.1259 | 96.10% | 1.1916 | 63.300% | 61.600% |
| 9000 | 50 | 0.0939 | 96.85% | 1.1868 | 65.667% | 67.200% |
| 13000 | 50 | 0.1165 | 96.15% | 0.9556 | 70.425% | 68.925% |
| 17000 | 50 | 0.0880 | 97.07% | 0.9687 | 72.059% | 71.638% |
| 21000 | 50 | 0.1491 | 95.09% | 0.8308 | 73.667% | 73.238% |
| 25000 | 50 | 0.0887 | 96.95% | 0.9341 | 74.200% | 75.638% |
Observations
images = s.images_from_a_folder('./images/*.*')
labels = []
for i in range(len(images)):
img = np.expand_dims(images[i], axis=0)
img_class = np.argmax(model.predict(img))
labels.append(img_class)
s.display_data(images, labels, 1, len(images))
With only 75% accuracy, it is expected that on randomly selected images, there will be misclassifications. Yet, the model seems to perform quite well as in the set of internet pictures (resized to 96 x 96 x 3)
Due to limitations in time and computing resources, we could not perform further experiments, but we have achieved the goals of this project and found a constructive way to improve performance of a model from scratch to perform consistantly better on the STL-10 dataset. In other words, we have determined a scalable method to obtain better performance on the dataset.
Note that we have a state-of-the-art performance using a model from scratch on the STL-10 dataset but only with help of transfer learning while understanding the unlabeled data. We have obtained this performance without applying popular preprocessing and image augmentations techniques.
Since we have devised a process involving a basic model from scratch, we may consider the following to improve performance further: